Data visualization

relationship between independent and dependent variables
red represents bad values, green represents good values

Preparation

Set up Hidden layer types

Set up functions

#x is hidden layer type; k is k folds;
cv.err<-function(x,data=data3,k=10,seed.val=1)
{
  MSE<-ave.RMSE<-va.MSE<-NULL
  set.seed(seed.val)
  #randomly shuffle the data
  data.rd<-data[sample(nrow(data)),]
  #Create k equally size folds
  folds<-cut(seq(1,nrow(data.rd)),breaks=k,labels=FALSE)
  layer.val<-x[1] # #layers
  nodes.val<-x[2] # #nodes
  #Parallel computing, return type is "vector"
  MSE<-foreach(i=1:k,.combine="c") %dopar%
  {
    set.seed(seed.val)
    test_ind<-which(folds==i,arr.ind=TRUE)
    testData<-data.rd[test_ind,]
    trainData<-data.rd[-test_ind,]
    #Estimated model of trainData
    nn.model<-neuralnet(minref+maxtrans~a+b+tBCB+r_MDA,data=trainData,
                        hidden=rep(nodes.val,layer.val),stepmax=1e+05,
                        algorithm="rprop+",err.fct="sse",
                        act.fct="logistic",linear.output=FALSE)
    #Predictions of testData
    nn.pred<-compute(nn.model,testData[1:4])
    #Mean square errors in each fold
    MSE<-(1/nrow(testData))*(sum((testData[,5]-nn.pred$net.result[,1])^2)+
                                       sum((testData[,8]-nn.pred$net.result[,2])^2))
  }
  #Overall Root mean square error for each network structure
  ave.RMSE<-sqrt(sum(MSE)/k)
  #variation of RMSE
  va.RMSE<-var(sqrt(MSE))
  return(list(MSE,ave.RMSE,va.RMSE))
}


model.rep<-function(x,data=data3,rep=10,seed.val=1)
{
  set.seed(seed.val)
  layer.val<-x[1] # #layers
  nodes.val<-x[2] # #nodes
  #Repeat training and save model list
  nn.model<-foreach(i=1:rep) %dopar%
  {
    neuralnet(minref+maxtrans~a+b+tBCB+r_MDA,data=data,
              hidden=rep(nodes.val,layer.val),stepmax=1e+05,
              algorithm="rprop+",err.fct="sse",
              act.fct="logistic",linear.output=FALSE)
  }
  return(nn.model)
}

Implementation

Parallel computing RMSE and variation for each hidden layer type

#setup ThreadProc
cl<-makeCluster(4)
registerDoParallel(cl)

time1<-Sys.time()
#Parallel computing MSE & overall RMSE & RMSE variation
#Assign given package(s) to each core
cv.info<-foreach(i=1:11,.packages=c("neuralnet","doParallel")) %dopar%
{
  cv.err(hid[i,],seed.val=2)
}
time2<-Sys.time()
time2-time1
## Time difference of 2.433182 mins
#extract overall RMSE for each structure
ave.RMSE<-foreach(i=1:11,.combine="c") %dopar%
{
  cv.info[[i]][[2]]
}
#extract variation of RMSE for each structure
va.RMSE<-foreach(i=1:11,.combine="c") %dopar%
{
  cv.info[[i]][[3]]
}
print(ave.RMSE)
##  [1] 0.05549995 0.05246400 0.05305360 0.05091927 0.07289841 0.05397246
##  [7] 0.04918410 0.15873826 0.04835688 0.05281400 0.09431314
print(va.RMSE)
##  [1] 0.0001692588 0.0001967575 0.0001566363 0.0001665786 0.0011950288
##  [6] 0.0002769358 0.0002918778 0.0031927483 0.0001796870 0.0002025642
## [11] 0.0035502518

Choose the best 5 hidden layer types based on the rank of RMSE and their variation

#RMSE info table ordered by overall RMSE
cv.table<-hid %>%
  mutate(ave.RMSE=ave.RMSE,va.RMSE=va.RMSE) %>%
  arrange(ave.RMSE)
#Select the structure where the results of RMSE and variation are
#both ranked in the 6 lowest values.
cv.table1<-cv.table[rank(cv.table$ave.RMSE)<=6&rank(cv.table$va.RMSE)<=6,]
print(cv.table1)
##   layers nodes   ave.RMSE      va.RMSE
## 1      4     5 0.04835688 0.0001796870
## 3      2     5 0.05091927 0.0001665786
## 4      1     6 0.05246400 0.0001967575
## 5      5     3 0.05281400 0.0002025642
## 6      2     3 0.05305360 0.0001566363

Repeat training for hidden layer types selected each with 10 times, extract errors for each repetition

#Repeat model 10 times for each hidden layer structure and save as list
#nn.model contains 5 hidden layer structure where 
#each structure has 10 models
#i.e nn.model[[i]][[j]] represents jth repetition for ith structure
nn.model<-foreach(i=1:5) %dopar%
{
  model.rep(cv.table1[i,],seed.val=2)
}

#Extract error vector for each structure,save as data.frame
#ith row represents ith structure,jth column represents jth repetition
nn.res<-foreach(i=1:length(nn.model),.combine="rbind") %dopar%
{
  #Extract errors for each repetition,save as vector
  foreach(j=1:length(nn.model[[i]]),.combine="c") %dopar%
  {
    nn.model[[i]][[j]]$result.matrix[1,]
  }
}

Generate grid prediction using above 50 models

#Grid prediction part
#Set up digits
grid<-expand.grid(a=round(seq(0.0,0.9,0.045),3),b=round(seq(0,0.9,0.045),3),
                  tBCB=round(seq(0.1,0.9,0.04),3),r_MDA=seq(0.61111,0.77778,0.0083335))

#grid prediction
grid.pred<-foreach(i=1:length(nn.model)) %dopar%
{
  #Extract errors for each repetition,save as vector
  foreach(j=1:length(nn.model[[i]]),.verbose=TRUE,.packages="neuralnet") %dopar%
  {
    cbind(grid,
          minref=neuralnet::compute(nn.model[[i]][[j]],grid[1:4])$net.result[,1],
          maxtrans=neuralnet::compute(nn.model[[i]][[j]],grid[1:4])$net.result[,2])
  }
}

Extract 5 rows corresponding to 5 lowest “minref” values and 5 highest “maxtrans” values respectively for each model
Combine them as a new subset
Here we need to check the consistency in this new subset between “minref” and “maxtrans” variables; that is, we want to see whether in most cases, both dependent variables can get their optimal points simultaneously; We check by looking whether “maxtrans” value will get the highest value when “minref” value reaches its lowest point

#subset correponding to 5 lowest minref and 5 highest maxtrans
min.5ref<-foreach(i=1:length(grid.pred),.packages="doParallel") %dopar%
{
  #Assign package "dplyr" to each core
  foreach(j=1:length(grid.pred[[i]]),.packages="dplyr") %dopar%
  {
    grid.pred[[i]][[j]] %>%
      #5 lowest minref values' subset
      filter(rank(minref)<=5)
  }
}
max.5tra<-foreach(i=1:length(grid.pred),.packages="doParallel") %dopar%
{
  #Assign package "dplyr" to each core
  foreach(j=1:length(grid.pred[[i]]),.packages="dplyr") %dopar%
  {
    grid.pred[[i]][[j]] %>%
      #5 highest maxtrans values' subset
      filter(rank(-maxtrans)<=5)
  }
}

#combine subset to see consistency
#combine row values as data.frame
grid.comb<-foreach(i=1:length(grid.pred),.combine="rbind") %dopar%
{
  #combine column features to compare consistency
  foreach(j=1:length(grid.pred[[i]]),.packages="dplyr",.combine="rbind") %dopar%
  {
    #structure represents hidden layer structure,rep represents jth repetition
    cbind(min.5ref[[i]][[j]],max.5tra[[i]][[j]],
          structure=paste(cv.table1[i,]$layers,"layers",cv.table1[i,]$nodes,"nodes",sep=""),
          rep=j)
  }
}
# save results to the file
# write.csv(grid.comb,"grid_comb.csv")

Contour plot

Generate contour plot for grid prediction choosing several combinations

Part of examples

minref contour plot with a=b=0
maxtrans contour plot with a=b=0<br.

minref contour plot with tBCB=0.30&r_MDA=0.7778
maxtrans contour plot with tBCB=0.30&r_MDA=0.7778

Algorithm constrast on time-complexity

Function preparation

nn<-function(x)
{
  set.seed(11)
  time9<-Sys.time()
  nn<-neuralnet(minref+maxtrans~a+b+tBCB+r_MDA,data=data3,
                hidden=c(4,4,4,4,4),stepmax=1e+05,
                algorithm="rprop+",err.fct="sse",
                act.fct="logistic",linear.output=FALSE)
  time10<-Sys.time()
  time10-time9
  return(nn)
}

Constrast among “for” “apply” “lapply” and “foreach”

model<-list()
time1<-Sys.time()
for(i in 1:10)
{
  model[[i]]<-nn(x)
}
time2<-Sys.time()
time2-time1
## Time difference of 50.48314881 secs
time5<-Sys.time()
model<-lapply(model,nn)
time6<-Sys.time()
time6-time5
## Time difference of 48.3016541 secs
m<-matrix(0,10,1)
time7<-Sys.time()
model<-apply(m,1,nn)
time8<-Sys.time()
time8-time7
## Time difference of 47.49610519 secs
cl<-makeCluster(4)
registerDoParallel(cl)
time3<-Sys.time()
model<-foreach(i=1:10,.packages="neuralnet") %dopar%
{
  nn(x)
}
time4<-Sys.time()
time4-time3
## Time difference of 20.81961894 secs
stopCluster(cl)